L05 Annotation & Positioning

Data Visualization (STAT 302)

Author

JULIANNA WANG

Overview

The goal of this lab is to explore methods for annotating and positioning with ggplot2 plots. This lab also utilizes scale_* to a greater degree which is part of our next reading. In fact, students may find going through/reading chapter 11 Colour scales and legends useful.

Datasets

We’ll be using the blue_jays.rda, titanic.rda, Aus_athletes.rda, and tech_stocks.rda datasets.

# Load package(s)
library(tidyverse)
library(patchwork)
library(cowplot)
library(ggrepel)

# Load data
load('data/blue_jays.rda')
load('data/titanic.rda')
load('data/Aus_athletes.rda')
load('data/tech_stocks.rda')

Exercises

Complete the following exercises.

Exercise 1

Using the blue_jays.rda dataset, recreate the following graphic as precisely as possible.

Hints:

  • Transparency is 0.8
  • Point size 2
  • Create a label_info dataset that is a subset of original data, just with the 2 birds to be labeled
  • Shift label text horizontally by 0.5
  • See ggplot2 textbook 8.3 building custom annotations
  • Annotation size is 4
  • Classic theme
Solution
Code
# annotate location
y_range <- range(blue_jays$Head)
x_range <- range(blue_jays$Mass)
caption <- 'Head length versus body mass for 123 blue jays'

# dataset for geom_text labels
label_info <- blue_jays |>
  filter(BirdID %in% c('1142-05914', '702-90567'))

# building the plot
ggplot(data = blue_jays, aes(x = Mass, y = Head, color = KnownSex)) +
  geom_point(size = 2, alpha = 0.8, show.legend = FALSE) + 
  geom_text(data = label_info, aes(label = KnownSex), nudge_x = 0.5, show.legend = FALSE) + 
  xlab('Body mass (g)') +
  ylab('Head length (mm)') + 
  annotate(geom = 'text', x = x_range[1], y = y_range[2], label = caption, hjust = 0, vjust = 1, size = 4) + #we want the smallest x, so right value, and tallest y, so left value
  theme_classic()

Exercise 2

Using the tech_stocks dataset, recreate the following graphics as precisely as possible. Use the column price_indexed.

Plot 1

Hints:

  • Create a label_info dataset that is a subset of original data, just containing the last day’s information for each of the 4 stocks
  • serif font
  • Annotation size is 4
Solution
Code
# annotation location
y_range <- range(tech_stocks$price_indexed)
x_range <- range(tech_stocks$date)
caption <- 'Stock price over time for four major tech companies'

# label info
label_info <- tech_stocks |>
  slice_max(date, by = company)
  
#building the plot
ggplot(data = tech_stocks, aes(x = date, y = price_indexed)) +
  geom_line(aes(color = company), show.legend = FALSE) +
  geom_text(data = label_info, aes(label = company)) + 
  xlab(NULL) +
  ylab('Stock price, indexed') +
  annotate(geom = 'text', x = x_range[1], y = y_range[2], label = caption, hjust = 0, vjust = 1, size = 4, family = 'serif') + #family is the variable for fonts 
  theme_minimal()

Plot 2

Hints:

  • Package ggrepel
    • box.padding is 0.6
    • Minimum segment length is 0
    • Horizontal justification is to the right
    • seed of 9876
  • Annotation size is 4
  • serif font
Solution
Code
#annotations 
y_range <- range(tech_stocks$price_indexed)
x_range <- range(tech_stocks$date)
caption <- 'Stock price over time for four major tech companies'

#labeling 
label_info <- tech_stocks |>
  slice_max(date, by = company)

#building the plot
ggplot(data = tech_stocks, aes(x = date, y = price_indexed)) +
  geom_line(aes(color = company), show.legend = FALSE) +
  geom_text_repel(data = label_info, aes(label = company), box.padding = 0.6, segment.size = 0.5, hjust = 1, seed = 9876) +
  xlab(NULL) +
  ylab('Stock price, indexed') + 
  annotate(geom = 'text', x = x_range[1], y = y_range[2], label = caption, hjust = 0, vjust = 1, size = 4, family = 'serif') + 
  theme_minimal()

Exercise 3

Using the titanic.rda dataset, recreate the following graphic as precisely as possible.

Hints:

  • Create a new variable that uses died and survived as levels/categories
  • Hex colors: #D55E00D0, #0072B2D0 (no alpha is being used)
Solution
Code
# new variable 
titanic$survival_status <- ifelse(titanic$survived == 1, 'survived', 'died')

# building the plot 
ggplot(data = titanic, aes(x = sex, fill = sex)) +
  geom_bar(position = 'dodge') + 
  facet_grid(survival_status ~ class) + 
  scale_fill_manual(values = c('#D55E00D0', '#0072B2D0'), guide = 'none') + 
  theme_minimal()

Exercise 4

Use the athletes_dat dataset — extracted from Aus_althetes.rda — to recreate the following graphic as precisely as possible. Create the graphic twice: once using patchwork and once using cowplot.

Code
# Get list of sports played by BOTH sexes
both_sports <- Aus_athletes |>
  # dataset of columns sex and sport 
  # only unique observations
  distinct(sex, sport) |>
  # see if sport is played by one gender or both
  count(sport) |>
  # only want sports played by BOTH sexes
  filter(n == 2) |>
  # get list of sports
  pull(sport)

# Process data
athletes_dat <- Aus_athletes |>
  # only keep sports played by BOTH sexes
  filter(sport %in% both_sports) |>
  # rename track (400m) and track (sprint) to be track
  # case_when will be very useful with shiny apps
  mutate(
    sport = case_when(
      sport == "track (400m)" ~ "track",
      sport == "track (sprint)" ~ "track",
      TRUE ~ sport
      )
    )

Hints:

  • Build each plot separately
  • Bar plot: lower limit 0, upper limit 95
  • Bar plot: shift bar labels by 5 units and top justify
  • Bar plot: label size is 5
  • Bar plot: #D55E00D0 & #0072B2D0 — no alpha
  • Scatterplot: #D55E00D0 & #0072B2D0 — no alpha
  • Scatterplot: filled circle with “white” outline; size is 3
  • Scatterplot: rcc is red blood cell count; wcc is white blood cell count
  • Boxplot: outline #D55E00 and #0072B2; shading #D55E0040 and #0072B240
  • Boxplot: should be made narrower; 0.5
  • Boxplot: Legend is in top-right corner of bottom plot
  • Boxplot: Space out labels c("female ", "male")
  • Boxplot: Legend shading matches hex values for top two plots

Using patchwork

Solution
Code
# label info for bar plot 
label_info <- athletes_dat |>
  count(sex)

# building barplot -- get numbers right
barplot <- ggplot(data = athletes_dat, aes(x = sex, fill = sex)) +
  geom_bar() +
  scale_y_continuous(name = 'number', limits = c(0, 95), expand = c(0,0)) +
  scale_x_discrete(name = NULL, labels = c('female', 'male')) +
  scale_fill_manual(values = c('#D55E00D0', '#0072B2D0'), guide = 'none') +
  geom_text(data = label_info, aes(label = n, y = n), vjust = 1, size = 5, nudge_y = -5) + 
  theme_minimal() 

# building scatterplot
scatterplot <- ggplot(data = athletes_dat, aes(x = rcc, y = wcc)) + 
  geom_point(color = 'white', shape = 21, size = 3, aes(fill = factor(sex))) + 
  scale_fill_manual(values = c('#D55E00D0', '#0072B2D0'), guide = 'none') +
  xlab('RBC count') + 
  ylab('WBC count') + 
  theme_minimal()

# building boxplot -- fix legend
boxplot <- ggplot(data = athletes_dat, aes(x = sport, y = pcBfat, color = sex, fill = sex)) +
  geom_boxplot(width = 0.5) + 
  scale_color_manual(values = c('#D55E00', '#0072B2'), guide = 'none') +
  scale_fill_manual(values = c('#D55E0040', '#0072B240'), label = c("female    ", "male")) +
  xlab(NULL) + 
  ylab('% body fat') + 
  theme_minimal() + 
  theme(legend.justification = c(1, 1), legend.position = c(1, 1), legend.direction = 'horizontal') + 
  guides(fill = guide_legend(override.aes = list(color = NA, fill = c('#D55E00D0', '#0072B2D0')))) 

# formatting using patchwork
(barplot + scatterplot) / boxplot

Using cowplot

Use cowplot::plot_grid() to combine them.

Solution
Code
plot_grid(plot_grid(barplot + theme(legend.position = 'none'), scatterplot + theme(legend.position = 'none'),
                    nrow = 1), boxplot, ncol = 1)

Exercise 5

Create the following graphic using patchwork.

Hints:

  • Use plots created in Exercise 4
  • inset theme is classic
    • Useful values: 0, 0.45, 0.75, 1
  • plot annotation "A"
Solution
Code
# using patchwork, plot_annotation
scatterplot + 
  inset_element(barplot, left = 0.75, bottom = 0, right = 1, top = 0.45) +
  plot_annotation(tag_levels = "A") + 
  theme_classic()